# -*- coding: utf-8 -*-
from scrapy.spiders import CrawlSpider,Rule  
from scrapy.linkextractors import LinkExtractor  
from courselist.items import CourselistItem  

class CourseurlSpider(CrawlSpider):
    name = 'courseurl'
    allowed_domains = ['edu.csdn.net']
    start_urls = ['https://edu.csdn.net/courses/k']		#第一页课程列表页面URL
    item = CourselistItem()

    #Rule是在定义抽取链接的规则
    rules = (  
        Rule(LinkExtractor(allow=('http://edu.csdn.net/courses/k/p[0-9]+',)), callback='parse_item', follow=True),  
    ) 

    def parse_item(self, response):
        item = self.item
        item['url'] = response.url  
        return item
